home *** CD-ROM | disk | FTP | other *** search
- #ifdef ALLOC
- # define EXTERN
- # define INIT( _x ) = _x
- #else
- # define EXTERN extern
- # define INIT( _x )
- #endif
-
- /*
- * Some OS dependent stuff
- */
- #define rfa_type long
-
- #define SZ$_FILE_NAME 256
- #ifdef VOS /* 24-bit word */
- #define HI7 077400000 /* Hi 7 bits of a word */
- #else /* 32-bit word */
- #define HI7 0xFE000000 /* Hi 7 bits of a word */
- #endif
-
- /*
- * Cache Entry:
- *
- * This structure describes a single entry in the cache of lines.
- * The cache is organized as a linked list of cache entries.
- * The entries at the front of the list are the most recently accessed.
- * The variables 'cache_head_ptr' and 'cache_tail_ptr' point to the
- * head and the tail of the cache.
- */
-
- struct cache_entry_struct;
- typedef struct cache_entry_struct cache_entry_type;
-
- struct cache_entry_struct {
-
- cache_entry_type * cache_next_ptr;/* Link to the next cache entry */
-
- cache_entry_type * cache_prev_ptr;/* Link to the prev cache entry */
-
- int hash_code; /* The hash code for the line. A a value of
- HASH_FREE_ENTRY indicates this is a free
- cache entry. */
-
- int record_length; /* Length of the record */
-
- char *recordp; /* Actual record contents. */
-
- int record_alen; /* Allocated length of recordp buffer */
-
- };
-
- #define CACHE_ENTRIES 500 /* Total number of cache entries */
- EXTERN cache_entry_type * cache_head_ptr;/* Head of the cache linked list. */
- EXTERN cache_entry_type * cache_tail_ptr;/* Head of the cache linked list. */
-
-
- #define LINE_LENGTH 135 /* Default line length */
- #define PAGE_LENGTH 66 /* Default page length */
- #define HEAD_LENGTH 9 /* # lines of output page headings */
-
- /* Maximum number of characters in a record */
- /*
- * record_type:
- *
- * This structure describes a single record in a file.
- * The 'file_type' structure points to an array of these entries.
- */
-
- struct record_struct;
- typedef struct record_struct record_type;
-
- struct record_struct {
-
- rfa_type rfa; /* The record's file address. This is an
- operating system dependent value which is a
- token which can be used to seek to the
- specified record. */
-
- /*
- * The 'value' field describes the relationship between this record and
- * another record in another file. Valid values are:
- *
- * Negative: hash code for the record (See 'is_hash_code' macro).
- * A hash code is an index into the symbol table.
- * 0 or Positive: index into record array of other file.
- *
- * The defines below are used to describes whether the 'value[0]' or
- * 'value[1]' field is used to describe a relationship between files.
- */
-
- #define MAX_VALUE_SUB 2
-
- int value[MAX_VALUE_SUB];
- /* Describes the relationship between this
- record and another record in another file.
- */
-
-
- #define OLD_TO_NEW1 0 /* Old file: index into new1 */
- #define OLD_TO_NEW2 1 /* Old file: index into new2 */
- #define NEW1_TO_OLD 0 /* new1 file: index into old */
- #define NEW1_TO_NEW2 1 /* new1 file: index into new2 */
- #define NEW2_TO_OLD 0 /* new2 file: index into old */
- #define NEW2_TO_NEW1 1 /* new2 file: index into new1 */
-
- };
-
- /*
- * Record index values:
- *
- * Record indexes include a special record at the beginning of the file
- * and a special record at the end of the file. These definitions describe
- * that phenomena.
- */
-
- #define BEGIN_INDEX 0 /* Index of the dummy begin record */
- #define DUMMY_RECORD_COUNT 2 /* Number of dummy records */
-
- /*
- * File Description:
- *
- * This structure describes a single input file.
- * A structure of this type occurs for the 'old' file, 'new1' file,
- * and 'new2' file.
- */
-
- struct file_struct;
- typedef struct file_struct file_type;
-
- struct file_struct {
-
- char *name_ptr; /* Zero terminated name of file */
-
- char *text_ptr; /* Zero terminated text describing file */
-
- char *lw_ptr; /* Zero terminated last written date */
-
- FILE * seq_fd; /* fd to use for sequential access */
-
- FILE * rnd_fd; /* fd to use for random access */
-
- int record_array_size;/* number of lines in the file (including
- DUMMY_RECORD_COUNT). */
-
- int record_array_alloc;/* number of allocated entries in the
- record array. */
-
- #define RA_ORIG 5000 /* Original # of records in record array */
- #define RA_INCR 5000 /* Number of records to add on each increment */
-
- record_type * record; /* Allocated array of record descriptions.
- This field contains 0 if the file does not
- exists. (i.e., this is the third file in a
- two file comparison ) */
-
- /*
- * The entry below is actually the portion of the symbol table which
- * needs an entry for each file. The array is indexed by 'hash_code'.
- *
- * Each index below is an index into the array for the specified file.
- * Valid values are:
- *
- * 0: This line is not in this file.
- * negative: This line is not unique in the file.
- * Value is negative index to one of the records.
- * not negative: This line occurs precisely once in the file.
- * Value is index to the record.
- */
-
- int *sym_tab_index; /* Index into 'record'. */
- /* There are 'sym_tab_size' elements in this array. */
- };
-
- #define OLD_FILE 0 /* Array index of 'old' file */
- #define NEW1_FILE 1 /* Array index of 'new1' file */
- #define NEW2_FILE 2 /* Array index of 'new2' file */
- #define MAX_FILE_COUNT 3 /* Maximum number of files */
-
- EXTERN int file_count; /* actual number of files */
-
- EXTERN file_type files[MAX_FILE_COUNT];/* Description of the each file. */
-
- /*
- * For each record, six different relationships exist. That is,
- * for each of the three files there is a relationship to each of the
- * other two files.
- * The tables below describe the six relationships.
- */
-
- #define MATCH_COUNT ( 2*MAX_FILE_COUNT )
-
- EXTERN int curr_file[MATCH_COUNT]
- #ifdef ALLOC
- = {
- OLD_FILE, OLD_FILE, NEW1_FILE, NEW1_FILE, NEW2_FILE, NEW2_FILE
- }
- #endif
- ;
-
- /*Array of subsrcipts into the 'files' array of files which have
- relationships */
-
- EXTERN int corres_file[MATCH_COUNT]
- #ifdef ALLOC
- = {
- NEW1_FILE, NEW2_FILE, OLD_FILE, NEW2_FILE, OLD_FILE, NEW1_FILE
- }
- #endif
- ;
- /* Array of subscripts into the 'files' array of the file which is related to */
-
-
- EXTERN int other_file[MATCH_COUNT]
- #ifdef ALLOC
- = {
- NEW2_FILE, NEW1_FILE, NEW2_FILE, OLD_FILE, NEW1_FILE, OLD_FILE
- }
- #endif
- ;
- /* Array of subscripts into the 'files' array of the file which is not
- involved in the current relationship */
-
- EXTERN int value_sub[MATCH_COUNT]
- #ifdef ALLOC
- = {
- OLD_TO_NEW1, OLD_TO_NEW2, NEW1_TO_OLD, NEW1_TO_NEW2,
- NEW2_TO_OLD, NEW2_TO_NEW1
- }
- #endif
- ;
- /* Array of subscripts to the 'value' array. This subscript identifies which
- of the two relationships are being tested. */
-
- EXTERN int rev_value_sub[MATCH_COUNT]
- #ifdef ALLOC
- = {
- NEW1_TO_OLD, NEW2_TO_OLD, OLD_TO_NEW1, NEW2_TO_NEW1,
- OLD_TO_NEW2, NEW1_TO_NEW2
- }
- #endif
- ;
- /* Array of subscripts to the 'value' array. This subscript identifies the
- relationship between the 'corres' file and the 'curr' file */
-
- EXTERN int other_value_sub[MATCH_COUNT]
- #ifdef ALLOC
- = {
- NEW2_TO_OLD, NEW1_TO_OLD, NEW2_TO_NEW1, OLD_TO_NEW1,
- NEW1_TO_NEW2, OLD_TO_NEW2
- }
- #endif
- ;
- /* Array of subscripts to the 'value' array. This subscript identifies the
- relationship between the 'other' file and the 'curr' file */
-
-
- #define UNIQUE_MATCH_COUNT (MATCH_COUNT / 2)
-
- EXTERN int unique_match[UNIQUE_MATCH_COUNT]
- #ifdef ALLOC
- = {
- 0, 1, 3
- }
- #endif
- ;
- /* Array of subscripts into the relation arrays defined above. These are the
- subscripts of the relations pairing each pair of files precisely once. */
- /*
- * is_hash_code:
- *
- * This macro determines if the value in the record array is a hash code
- * or an index into another file array. This macro relies on the fact
- * that all hash codes are nagetive.
- *
- * Return value:
- * TRUE: The value represents a hash code
- * FALSE: The value represents an index into a file array.
- *
- * Parameter:
- * value: The value from the file array.
- */
-
- #define is_hash_code( _value ) ((_value) < 0)
- /*
- * Options:
- */
-
- EXTERN bool blank_compress INIT (FALSE);
- /* TRUE if blank compression is desired */
-
- EXTERN bool blank_remove INIT (FALSE);/* TRUE if blank removal is desired */
-
- EXTERN bool compress_records INIT (FALSE);
- /* TRUE if any record compression needs to
- occur */
-
- EXTERN int prefix_lines INIT (5);/* Number of prefix lines */
-
- EXTERN int postfix_lines INIT (5);/* Number of postfix lines */
-
- EXTERN int page_length INIT (PAGE_LENGTH);/* Number of lines/page */
-
- EXTERN bool quiet_option INIT (FALSE);
- /* TRUE if COMBINE is to be quiet if there are
- no differences */
-
- EXTERN bool pa_debug INIT (FALSE);/* TRUE for generic debugging */
- EXTERN bool p1_debug INIT (FALSE);/* TRUE for debug of pass 1 */
- EXTERN bool p2_debug INIT (FALSE);/* TRUE for debug of pass 2 */
- EXTERN bool p3_debug INIT (FALSE);/* TRUE for debug of pass 3 */
- EXTERN bool p4_debug INIT (FALSE);/* TRUE for debug of pass 4 */
- EXTERN bool p5_debug INIT (FALSE);/* TRUE for debug of pass 5 */
-
- EXTERN bool statistics_flag INIT (FALSE);/* TRUE to output statistics */
-
- EXTERN bool hed_flag INIT (FALSE);/* TRUE to output hed file */
-
- EXTERN char exec_time[LINE_LENGTH];/* Begin execution time */
-
- /*
- * Column specifications:
- */
-
- #define MAX_COLUMNS 32 /* maximum number of column ranges */
-
- EXTERN int column_count INIT (0);/* Actual number of column ranges */
-
- EXTERN int first_column[MAX_COLUMNS];/* first column to compare */
- /* Column numbers are 0 relative */
-
- EXTERN int last_column[MAX_COLUMNS];/* last column to compare */
- /* Column numbers are 0 relative */
-
- /*
- * other_sub:
- *
- * This macro is given the subscript to one of the elements in the
- * 'value' array and returns the subscript to the other element.
- * This macro is heavily dependent on the fact that there are only
- * two elements in the value array.
- *
- * Return value:
- * other subscript
- *
- * Parameter:
- * value_sub: Subsrcipt into the value array.
- */
-
- #define other_sub( _sub ) ( 1 - (_sub) )
-
- /*
- * Primes: list of prime numbers.
- *
- * This array defines a set of prime numbers. For all multiples of 1024,
- * this table contains the prime number which is less than but closest
- * to that number.
- *
- * The list is terminated by a -1.
- */
-
- EXTERN int primes[]
- #ifdef ALLOC
- = {
- 1021, 2039, 3067, 4093, 5119, 6143, 7159, 8191, 9209,
- 10223, 11261, 12281, 13309, 14327, 15359, 16381, 17401, 18427,
- 19447, 20479, 21503, 22511, 23549, 24571, 25589, 26597, 27647,
- 28669, 29683, 30713, 31741, 32749, 33791, 34807, 35839, 36857,
- 37879, 38903, 39929, 40949, 41983, 43003, 44029, 45053, 46073,
- 47093, 48121, 49139, 50159, 51199, 52223, 53239, 54269, 55291,
- 56311, 57331, 58367, 59387, 60413, 61417, 62459, 63487, 64499,
- 65521, 66553, 67579, 68597, 69623, 70639, 71671, 72701, 73727,
- 74747, 75773, 76781, 77813, 78839, 79867, 80863, 81919, 82939,
- 83939, 84991, 86011, 87037, 88037, 89087, 90107, 91129, 92153,
- 93179, 94207, 95231, 96233, 97259, 98299, 99317, 100343, 101363,
- 102397, 103423, 104417, 105467, 106487, 107509, 108541, 109567,
- 110587, 111611, 112621, 113657, 114679, 115693, 116731, 117757,
- 118757, 119797, 120829, 121853, 122869, 123887, 124919, 125941,
- 126967, 127997, 129023, 130043, 131071, 132071, 133117, 134129,
- 135151, 136189, 137209, 138239, 139241, 140281, 141311, 142327,
- 143357, 144383, 145399, 146423, 147451, 148471, 149503, 150523,
- 151549, 152567, 153589, 154621, 155627, 156671, 157679, 158699,
- 159739, 160757, 161783, 162791, 163819, 164839, 165887, 166909,
- 167917, 168943, 169957, 171007, 172031, 173053, 174079, 175103,
- 176123, 177131, 178169, 179173, 180221, 181243, 182261, 183289,
- 184309, 185327, 186343, 187387, 188407, 189439, 190409, 191473,
- 192499, 193513, 194543, 195581, 196597, 197621, 198647, 199679,
- 200699, 201709, 202751, 203773, 204797, 205823, 206827, 207869,
- 208891, 209917, 210943, 211949, 212987, 214009, 214993, 216061,
- 217081, 218111, 219133, 220151, 221173, 222199, 223229, 224251,
- 224737,
- -1
- }
- #endif
- ;
-
- /*
- * relate_type:
- *
- * This structure describes the relationsip between between a particular
- * record of a particular file and corresponding records in the other
- * files.
- *
- * This structure is built by 'pass5_analyze_relationship'. This structure
- * is used by all of the other pass5 routines to determine whether the
- * current record is the next one to be output.
- */
-
- struct relate_struct;
- typedef struct relate_struct relate_type;
-
- struct relate_struct {
-
- int index[MAX_FILE_COUNT];
- /* Index that this record appears at in the
- file. Value will be a hash code if this
- record is not in the corresponding file */
-
- bool current[MAX_FILE_COUNT];
- /* TRUE if the record at the current position
- in the corresponding file */
-
- int relation; /* A summary of the relationship of this record
- to the current record in each file */
- /* The zeroeth element is represented in the least significant bit,
- etc. */
-
- #define INSERT_NONE 0
- #define INSERT_OLD 1
- #define INSERT_NEW1 2
- #define INSERT_OLD_NEW1 (INSERT_OLD + INSERT_NEW1)
- #define INSERT_NEW2 4
- #define INSERT_OLD_NEW2 (INSERT_OLD + INSERT_NEW2)
- #define INSERT_NEW1_NEW2 (INSERT_NEW1 + INSERT_NEW2)
- #define INSERT_OLD_NEW1_NEW2 (INSERT_OLD + INSERT_NEW1 + INSERT_NEW2)
- #define INSERT_EOT -1
-
- bool moved; /* TRUE if this record is involved in a record
- movement. */
-
- bool in_all; /* TRUE if the record is at the current
- position in all of the files. */
-
- };
-
- /*
- * Statistics:
- */
-
- EXTERN int cache_miss; /* total number of cache misses. */
-
- EXTERN int hash_collisions;/* total number of hash collsions */
-
- EXTERN int old_new1_change_count;
- /* Number of differences between old and new1
- files */
-
- EXTERN int old_new2_change_count;
- /* Number of differences between old and new2
- files */
-
- EXTERN int new1_new2_change_count;
- /* Number of differences between new1 and new2
- files */
-
- /*
- * Symbol Table:
- *
- * This structure describes a the symbol table.
- * Each entry in the symbol table represents a record in one of the files.
- * The contents of each record is hashed.
- * The hash value is used as an index into the arrays.
- * If the hash value is not unique, a re-hash is performed until a
- * unique hash value is obtained.
- *
- * The symbol table is organized as four arrays of entries.
- * There is one array for each file and one array of cache entry pointers
- * described below.
- */
-
- /*
- * The index into the symbol table is a hash code. Hash codes are positive.
- * Significant hash codes include:
- *
- * 0: Not valid
- * 1: begin record
- * 2: end record
- * 3: eof (some archaic operating systems allow multiple eof's in a file.)
- */
-
- #define HASH_FREE_ENTRY 0 /* A hash code of this value indicates a free
- entry. This value is used in a cache entry
- to indicate an unused cache entry */
-
- /*
- * The cache ptr below is a pointer to the cache entry for the line.
- * Valid values are:
- *
- * CACHE_FREE_ENTRY: This symbol table entry is unused.
- * CACHE_NOT_IN_CACHE: This line is no longer in the cache.
- * positive: Pointer to cache entry.
- */
-
- EXTERN cache_entry_type * *sym_tab_cache_ptr;
- /* Pointer to table of pointers to cache
- entries */
-
- #define CACHE_FREE_ENTRY 0 /* Symbol table entry is unused */
- /* The code depends on the fact that the allocator zeros this entry upon
- allocation */
-
- #define CACHE_NOT_IN_CACHE -1 /* This line no longer in cache */
-
- EXTERN int sym_tab_size; /* Number of entries in the symbol table. */
- /*
- * Procedure forwards:
- */
-
- void dump_arrays() ;
- void dump_statistics() ;
- void dump_sym_tab() ;
-
- void error() ;
- char * mem_alloc() ;
- void init() ;
- void link_records() ;
-
- void pass1() ;
- int pass1_read_record() ;
- void pass1_record_compress() ;
-
- void pass2() ;
-
- void pass3() ;
- void pass3_scan() ;
-
- void pass4() ;
- void pass4_scan() ;
-
- void pass5() ;
- void pass5_analyze_relationship() ;
- int pass5_move() ;
- void pass5_dump_record() ;
- void pass5_write_hed() ;
- void pass5_write_listing() ;
- void pass5_write_listing_line() ;
- void pass5_write_listing_head() ;
-